Read in data

fert.data <- read_csv(here::here("fert-data.csv"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   pH.experim = col_double(),
##   Perc.Fertilization = col_double(),
##   Insemination.mins = col_double(),
##   Fert.success.mins = col_double(),
##   Sperm.pre.exp.time = col_double(),
##   egg.pre.exp.time = col_double(),
##   pH.control = col_double(),
##   pH.delta = col_double(),
##   Sperm.per.uL = col_double(),
##   Sperm.per.mL = col_number(),
##   n.females = col_double(),
##   n.males = col_double(),
##   Yeear = col_double()
## )
## See spec(...) for full column specifications.
fert.data.2 <-
  fert.data %>%
  dplyr::select(pH.experim, Perc.Fertilization, 
         Insemination.mins, Fert.success.mins, Sperm.pre.exp.time, 
         egg.pre.exp.time, pH.delta, Sperm.per.mL, sperm.egg, n.females, n.males) %>%
  mutate_if(is.factor, as.numeric) %>%
  mutate_if(is.character, as.numeric)
## Warning: NAs introduced by coercion
fert.data.3 <-
  fert.data %>%
  mutate_at(c("Phylum", "Common name", "Brooders/Spawniers", "Family", "Taxa", "Species") , as.factor)
fert.data.3$pH.group <- cut(fert.data.3$pH.experim,c(5.9,7.3,7.5,7.8, 8.2))
fert.data.3$Phylum <- factor(fert.data.3$Phylum, levels = c("Echinodermata", "Mollusca","Cnidaria","Crustacean"))
fert.data.4 <- fert.data.3[,c(1,5,7:9,11,12,16:19)] %>%
    mutate_if(is.factor, as.numeric) %>% #convert factors to numeric factors 
    mutate_if(is.character, as.numeric)  #convert character column to numeric 
## Warning: NAs introduced by coercion

Check out correlations among variables

  • Insemination minutes ~ Fertilization success mins - only use insemination minutes
  • pH delta ~ pH experimental - only use pH experimental
    other correlations, but not relevant (e.g. sperm/mL and egg pre-exposure time)
pairs(na.omit(fert.data.4), lower.panel=panel.smooth, upper.panel=panel.cor) 

# save to pdf 
pdf(file = "fert-correlation-panel.pdf", width = 12, height = 8.5)
pairs(na.omit(fert.data.4), lower.panel=panel.smooth, upper.panel=panel.cor) 
dev.off()
## quartz_off_screen 
##                 2

Generate distance matrixusing gowers coefficient

Gowers allows for missing data and multiple data types

dist.gower <- vegdist(fert.data.4, "gower") 

Perform PCoA

Its usage is: cmdscale(d, k, eig = FALSE, add = FALSE) where: • d is a dissimilarity object (generated by dist or vegdist) • k is the number of principal components (PC) that should be extracted from the distance matrix (max number = min(col, rows)-1) • eig, logical. If TRUE eigenvalues for each PC are retuned. Default: FALSE. • add, logical. If TRUE a constant is added to each value in the dissimilarity matrix so that the resulting eigenvalues are non-negative. Default: FALSE.

The principal scores are contained in spe.pcoa$points and the eigenvalues are contained in spe.pcoa$eig.

spe.pcoa <- cmdscale(dist.gower, eig=T, add=T, k=2)
head(spe.pcoa$points, n=15)
##             [,1]        [,2]
##  [1,] -0.6312144 -1.10963653
##  [2,] -0.6659012 -0.91498453
##  [3,] -0.8442947 -0.31278500
##  [4,] -0.8393372 -0.28372659
##  [5,] -0.6733455 -0.98486550
##  [6,] -0.6858787 -0.86243567
##  [7,] -0.6356584 -1.09620511
##  [8,] -0.6406292 -0.99456059
##  [9,] -1.0926354 -0.33980630
## [10,] -1.7569963  0.33231125
## [11,] -1.1538052  0.04844774
## [12,] -1.9954500  0.57209282
## [13,] -2.0591751  1.01716657
## [14,] -2.0589501  1.00410994
## [15,] -0.8746690 -0.95091085
head(spe.pcoa$eig, n=15)
##  [1] 372.08580 213.66589 152.90546  98.83822  85.38434  78.17744  76.82164
##  [8]  71.22881  67.85222  60.90775  59.10680  55.45987  51.75052  50.15431
## [15]  47.64539

Calculate the percent of variation explained by principal coordinates:

hist(spe.pcoa$eig/sum(spe.pcoa$eig)*100)

Compare the eigenvalues to expectations according to the broken stick model.

plot(spe.pcoa$eig[1:100]/sum(spe.pcoa$eig)*100,type="b",lwd=2,col="blue",xlab= "Principal Component from PCoA", ylab="% variation explained", main="% variation explained by PCoA (blue) vs. random expectation (red)")
lines(bstick(100)*100,type="b",lwd=2,col="red")

View the ordination plot.

This plot represents each of the sites in 2-D ordination space (x-axis = principal component 1, y-axis = principal component 2). (Should try to use something relevant for text)

Calculate the PC loadings (i.e., variable weights)

Calculate and depict species loadings (i.e., principal weights in the eigenvectors) on each principal coordinate. Use the function envfit() along with the PC scores from our PCoA object. The function envfit() performs a linear correlation analysis based on standardized data (in other words, a simple linear regression) between each of the original descriptors (i.e., species) and the scores from each principal component. A permutation test is used to assess statistical significance, rather than using the F distribution.

print(vec.sp<-envfit(spe.pcoa$points, k=45, fert.data.4, perm=1000, na.rm=T))
## 
## ***VECTORS
## 
##                        Dim1     Dim2     r2   Pr(>r)    
## Phylum             -0.89708 -0.44188 0.8764 0.000999 ***
## Taxa                0.90930  0.41613 0.8948 0.000999 ***
## pH.experim          0.04069 -0.99917 0.2440 0.000999 ***
## Perc.Fertilization  0.21783 -0.97599 0.7805 0.000999 ***
## Insemination.mins  -0.20953  0.97780 0.0975 0.002997 ** 
## Sperm.pre.exp.time -0.81658  0.57723 0.0596 0.044955 *  
## egg.pre.exp.time   -0.77307  0.63433 0.5632 0.000999 ***
## Sperm.per.mL       -0.78064  0.62498 0.5712 0.000999 ***
## sperm.egg          -0.99769  0.06788 0.6149 0.000999 ***
## n.females          -0.65602  0.75474 0.5176 0.000999 ***
## n.males            -0.67094  0.74151 0.2163 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
## 
## 340 observations deleted due to missingness

Plot the eigenvectors on the ordination plot

p.max is the significance level that the species occurrence data must have with either PC in order to be depicted (these p-values were presented in vec.sp).

fert.data.4$pH.group <- cut(fert.data.4$pH.experim, c(6,7.3,7.5,7.8, 8.2))
pl <- ordiplot(spe.pcoa, type = "none",  xlim = c(-1,1.5))
## species scores not available
points(pl, "sites", cex=0.8, pch=c(21,22,23,24)[fert.data.4$pH.group], bg=c("red","blue","green","purple")[fert.data.4$Phylum])
plot(vec.sp, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$Phylum), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="right", legend = levels(fert.data.3$pH.group),pch=c(21,22,23,24))

# save to pdf 
pdf(file = "fert.PCoA.pdf", width = 9, height = 8)
pl <- ordiplot(spe.pcoa, type = "none",  xlim = c(-1,1.5))
## species scores not available
points(pl, "sites", cex=0.8, pch=c(21,22,23,24)[fert.data.4$pH.group], bg=c("red","blue","green","purple")[fert.data.4$Phylum])
plot(vec.sp, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$Phylum), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="right", legend = levels(fert.data.3$pH.group),pch=c(21,22,23,24))
dev.off()
## quartz_off_screen 
##                 2

Re-run PCoA, leave phylum and taxa out of matrix, but then color code points that way.

dist.gower2 <- vegdist(fert.data.4[c(3:11)], "gower", na.rm = F) 
spe.pcoa2 <- cmdscale(dist.gower2, k=2, eig=T, add=T)
print(vec.sp2<-envfit(spe.pcoa2$points, k=45, fert.data.4[c(3:11)], perm=1000, na.rm=T))
## 
## ***VECTORS
## 
##                        Dim1     Dim2     r2   Pr(>r)    
## pH.experim         -0.73057 -0.68284 0.2953 0.000999 ***
## Perc.Fertilization -0.90933  0.41608 0.9917 0.000999 ***
## Insemination.mins   0.55402  0.83251 0.1267 0.003996 ** 
## Sperm.pre.exp.time  0.46661  0.88446 0.0444 0.065934 .  
## egg.pre.exp.time    0.50435  0.86350 0.6414 0.000999 ***
## Sperm.per.mL        0.50324  0.86415 0.6414 0.000999 ***
## sperm.egg           0.55276  0.83334 0.2141 0.000999 ***
## n.females           0.54147  0.84072 0.6670 0.000999 ***
## n.males             0.54651  0.83745 0.2314 0.000999 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
## 
## 340 observations deleted due to missingness
pl2 <- ordiplot(spe.pcoa2, type = "none",  xlim = c(-1,1.5))
## species scores not available
points(pl2, "sites", cex=1, pch=c(21,22,23,24)[fert.data.4$Phylum],
       bg=c("red","blue","green","purple")[fert.data.4$pH.group])
plot(vec.sp2, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors. 
legend(x="topright", legend = levels(fert.data.3$pH.group), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="bottomright", legend = levels(fert.data.3$Phylum),pch=c(21,22,23,24))

# Save to pdf  
pdf(file = "fert.PCoA-noPhylum.pdf", width = 9, height = 8)
pl2 <- ordiplot(spe.pcoa2, type = "none",  xlim = c(-1,1.5))
## species scores not available
points(pl2, "sites", cex=1, pch=c(21,22,23,24)[fert.data.4$Phylum],
       bg=c("red","blue","green","purple")[fert.data.4$pH.group])
plot(vec.sp2, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors. 
legend(x="topright", legend = levels(fert.data.3$pH.group), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="bottomright", legend = levels(fert.data.3$Phylum),pch=c(21,22,23,24))
dev.off()
## quartz_off_screen 
##                 2

Perform linear regression analysis by phylum

hist(fert.data.4$Perc.Fertilization)

Plot fertilization by experimental pH and phylum

# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, col=Taxa, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  facet_wrap(~Phylum, scale="free") +
  geom_smooth(method="lm", se=TRUE, aes(fill=Taxa)) +
  ggtitle("Fertilization Rate ~ pH exposure by phylum") +
  theme_minimal())
## Warning: Ignoring unknown parameters: width

## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning in qt((1 - level)/2, df): NaNs produced

Mollusca

fert<-subset(fert.data.3, Phylum=="Mollusca")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Mollusca")$pH.experim
summary(model1 <- lm(fert~ph))
## 
## Call:
## lm(formula = fert ~ ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.139 -17.522   2.456  17.871  54.092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -82.620     27.636  -2.990  0.00329 ** 
## ph            19.774      3.609   5.479 1.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.11 on 144 degrees of freedom
## Multiple R-squared:  0.1725, Adjusted R-squared:  0.1668 
## F-statistic: 30.02 on 1 and 144 DF,  p-value: 1.858e-07
plot(ph,fert,pch=21,col="brown",bg="yellow")
abline(model1,col="navy")

summary(model2 <- lm(fert~ph+I(ph^2)))
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.919 -16.309   2.201  18.863  54.523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -566.804    272.269  -2.082   0.0391 *
## ph           155.229     75.867   2.046   0.0426 *
## I(ph^2)       -9.390      5.253  -1.787   0.0760 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.93 on 143 degrees of freedom
## Multiple R-squared:  0.1906, Adjusted R-squared:  0.1793 
## F-statistic: 16.84 on 2 and 143 DF,  p-value: 2.715e-07
x <- c(5.5, 5.6, 5.7, 5.8, 5.9, 6, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)
y <- predict(model2,list(ph=x))
plot(ph,fert,pch=21,col="brown",bg="yellow")
lines(x,y,col="navy")

anova(model1, model2) # simple model as good as polynomial model. 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
##   Res.Df   RSS Df Sum of Sq      F  Pr(>F)  
## 1    144 76898                              
## 2    143 75217  1    1680.5 3.1949 0.07599 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model1$residuals) #check residuals 

plot(model1)

taxa <- as.factor(subset(fert.data.3, Phylum=="Mollusca")$Taxa)
summary(model2 <- lm(fert~ph+taxa)) #common slope, different intercepts by taxa 
## 
## Call:
## lm(formula = fert ~ ph + taxa)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -45.74 -10.74  -1.31  11.32  37.16 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -77.857     21.707  -3.587 0.000462 ***
## ph            21.792      2.733   7.975 4.88e-13 ***
## taxaclam     -39.129      4.849  -8.069 2.88e-13 ***
## taxamussel    -0.952      4.873  -0.195 0.845387    
## taxaoyster   -30.745      4.343  -7.080 6.40e-11 ***
## taxascallop  -12.163      6.462  -1.882 0.061884 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.82 on 140 degrees of freedom
## Multiple R-squared:  0.5739, Adjusted R-squared:  0.5587 
## F-statistic: 37.71 on 5 and 140 DF,  p-value: < 2.2e-16
anova(model1, model2) # definitely need to include sep. lines for taxa w/ diff intercepts 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph
## Model 2: fert ~ ph + taxa
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    144 76898                                  
## 2    140 39599  4     37299 32.967 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~ph*taxa)) # test for diff slopes and intercepts 
## 
## Call:
## lm(formula = fert ~ ph * taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.940 -10.699  -1.237  11.939  37.034 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -106.669    136.601  -0.781    0.436
## ph               25.472     17.441   1.461    0.146
## taxaclam       -123.099    152.267  -0.808    0.420
## taxamussel       28.897    140.807   0.205    0.838
## taxaoyster       14.390    139.815   0.103    0.918
## taxascallop     133.018    188.389   0.706    0.481
## ph:taxaclam      11.097     19.537   0.568    0.571
## ph:taxamussel    -3.820     18.038  -0.212    0.833
## ph:taxaoyster    -5.803     17.864  -0.325    0.746
## ph:taxascallop  -18.599     24.090  -0.772    0.441
## 
## Residual standard error: 16.82 on 136 degrees of freedom
## Multiple R-squared:  0.586,  Adjusted R-squared:  0.5586 
## F-statistic: 21.39 on 9 and 136 DF,  p-value: < 2.2e-16
anova(model2, model3) # don't need diff slopes 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph * taxa
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1    140 39599                           
## 2    136 38476  4    1123.1 0.9925  0.414
summary(model4 <- lm(fert~ph+I(ph^2) + taxa)) #test polynomial with taxa 
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2) + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.863 -10.514   0.447   8.640  37.702 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -597.5530   203.8928  -2.931  0.00395 ** 
## ph           167.1563    56.7824   2.944  0.00380 ** 
## I(ph^2)      -10.0811     3.9335  -2.563  0.01144 *  
## taxaclam     -40.3816     4.7805  -8.447 3.57e-14 ***
## taxamussel    -0.7367     4.7797  -0.154  0.87773    
## taxaoyster   -29.2334     4.2994  -6.799 2.85e-10 ***
## taxascallop  -11.9372     6.3381  -1.883  0.06173 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.49 on 139 degrees of freedom
## Multiple R-squared:  0.5931, Adjusted R-squared:  0.5755 
## F-statistic: 33.77 on 6 and 139 DF,  p-value: < 2.2e-16
anova(model2, model4) # improves the model 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph + I(ph^2) + taxa
##   Res.Df   RSS Df Sum of Sq      F  Pr(>F)  
## 1    140 39599                              
## 2    139 37812  1    1786.8 6.5683 0.01144 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model5 <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)) #<----------final model 
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.526 -10.396   0.056  10.468  33.947 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9401.480   2385.708  -3.941 0.000129 ***
## ph           3887.105   1006.131   3.863 0.000171 ***
## I(ph^2)      -530.433    140.584  -3.773 0.000239 ***
## I(ph^3)        24.116      6.513   3.703 0.000308 ***
## taxaclam      -40.635      4.576  -8.879 3.19e-15 ***
## taxamussel     -2.877      4.611  -0.624 0.533666    
## taxaoyster    -31.339      4.155  -7.543 5.56e-12 ***
## taxascallop   -14.001      6.092  -2.298 0.023058 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.79 on 138 degrees of freedom
## Multiple R-squared:  0.6299, Adjusted R-squared:  0.6111 
## F-statistic: 33.55 on 7 and 138 DF,  p-value: < 2.2e-16
anova(model4, model5) # improves the model 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + I(ph^2) + taxa
## Model 2: fert ~ ph + I(ph^2) + I(ph^3) + taxa
##   Res.Df   RSS Df Sum of Sq     F    Pr(>F)    
## 1    139 37812                                 
## 2    138 34395  1    3417.1 13.71 0.0003075 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model1, model2, model3, model4, model5) #AIC confirms that model5 is best 
##        df      AIC
## model1  3 1335.258
## model2  7 1246.362
## model3 11 1250.161
## model4  8 1241.621
## model5  9 1229.792
hist(model5$residuals) #residuals look pretty normal 

plot(model5) #i see no major issues with residuals 

lm.mollusc <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa) #<----------rename model 

Mollusca - Generate predictions from fitted model to plot

summary(lm.mollusc)
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.526 -10.396   0.056  10.468  33.947 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9401.480   2385.708  -3.941 0.000129 ***
## ph           3887.105   1006.131   3.863 0.000171 ***
## I(ph^2)      -530.433    140.584  -3.773 0.000239 ***
## I(ph^3)        24.116      6.513   3.703 0.000308 ***
## taxaclam      -40.635      4.576  -8.879 3.19e-15 ***
## taxamussel     -2.877      4.611  -0.624 0.533666    
## taxaoyster    -31.339      4.155  -7.543 5.56e-12 ***
## taxascallop   -14.001      6.092  -2.298 0.023058 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.79 on 138 degrees of freedom
## Multiple R-squared:  0.6299, Adjusted R-squared:  0.6111 
## F-statistic: 33.55 on 7 and 138 DF,  p-value: < 2.2e-16
predict.mollusc <- predict(lm.mollusc, interval = 'confidence')
predict.mollusc.df <- predict.mollusc %>%
  as.data.frame() %>%
  cbind(subset(fert.data.3, Phylum=="Mollusca")$pH.experim, as.factor(subset(fert.data.3, Phylum=="Mollusca")$Taxa)) %>% 
purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))
predict.mollusc.df$taxa  <- factor(predict.mollusc.df$taxa, levels=c("abalone", "mussel", "scallop", "oyster", "clam"))

Mollusca - Plot fertilization data with fitted models

fert.data.3 %>%
  filter(Phylum=="Mollusca") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.mollusc.df$fit, col=predict.mollusc.df$taxa)) +
  ggtitle("Mollusca") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa",
      values=c(abalone="#e41a1c",mussel="#4daf4a",scallop="#ff7f00",oyster="#984ea3",clam='#377eb8')) +
  theme_minimal()

Echinoderms

fert<-subset(fert.data.3, Phylum=="Echinodermata")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Echinodermata")$pH.experim
taxa <- as.factor(subset(fert.data.3, Phylum=="Echinodermata")$Taxa)

summary(model1 <- lm(fert~ph))
## 
## Call:
## lm(formula = fert ~ ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.318 -10.303   6.583  17.683  53.083 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -239.589     31.313  -7.651 5.29e-13 ***
## ph            40.501      4.073   9.943  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26 on 232 degrees of freedom
## Multiple R-squared:  0.2988, Adjusted R-squared:  0.2958 
## F-statistic: 98.86 on 1 and 232 DF,  p-value: < 2.2e-16
hist(model1$residuals)

plot(ph,fert,pch=21,col="brown",bg="yellow")
abline(model1,col="navy")

x <- c(5.5, 5.6, 5.7, 5.8, 5.9, 6, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)

summary(model2 <- lm(fert~ph+I(ph^2)))
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.718  -7.789   9.085  17.002  51.480 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1101.486    314.930  -3.498 0.000563 ***
## ph            274.791     85.290   3.222 0.001457 ** 
## I(ph^2)       -15.848      5.763  -2.750 0.006431 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.64 on 231 degrees of freedom
## Multiple R-squared:  0.321,  Adjusted R-squared:  0.3152 
## F-statistic: 54.61 on 2 and 231 DF,  p-value: < 2.2e-16
y <- predict(model2,list(ph=x))
plot(ph,fert,pch=21,col="brown",bg="yellow")
lines(x,y,col="navy")

anova(model1, model2) 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    232 156810                                
## 2    231 151839  1    4971.1 7.5627 0.006431 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model1 <- lm(fert~ph)) 
## 
## Call:
## lm(formula = fert ~ ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.318 -10.303   6.583  17.683  53.083 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -239.589     31.313  -7.651 5.29e-13 ***
## ph            40.501      4.073   9.943  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26 on 232 degrees of freedom
## Multiple R-squared:  0.2988, Adjusted R-squared:  0.2958 
## F-statistic: 98.86 on 1 and 232 DF,  p-value: < 2.2e-16
summary(model2 <- lm(fert~ph+I(ph^2))) 
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.718  -7.789   9.085  17.002  51.480 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1101.486    314.930  -3.498 0.000563 ***
## ph            274.791     85.290   3.222 0.001457 ** 
## I(ph^2)       -15.848      5.763  -2.750 0.006431 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.64 on 231 degrees of freedom
## Multiple R-squared:  0.321,  Adjusted R-squared:  0.3152 
## F-statistic: 54.61 on 2 and 231 DF,  p-value: < 2.2e-16
anova(model1, model2) #2nd order polynomial better fit than straight line 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    232 156810                                
## 2    231 151839  1    4971.1 7.5627 0.006431 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~ph+taxa)) # test different intercepts by taxa, common slope 
## 
## Call:
## lm(formula = fert ~ ph + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -76.846  -9.951   4.468  16.030  62.004 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -266.804     30.053  -8.878  < 2e-16 ***
## ph               39.891      3.851  10.360  < 2e-16 ***
## taxaSea star     38.990      7.868   4.956 1.40e-06 ***
## taxaSea urchin   33.623      6.390   5.262 3.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.56 on 230 degrees of freedom
## Multiple R-squared:  0.3797, Adjusted R-squared:  0.3716 
## F-statistic: 46.93 on 3 and 230 DF,  p-value: < 2.2e-16
anova(model1, model3) #different intercept by taxa improves model 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph
## Model 2: fert ~ ph + taxa
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    232 156810                                  
## 2    230 138714  2     18096 15.003 7.511e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model4 <- lm(fert~ph*taxa))# test diff slopes AND intercepts by taxa 
## 
## Call:
## lm(formula = fert ~ ph * taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -76.629  -9.125   4.466  16.097  62.140 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -292.7922   112.1737  -2.610  0.00965 **
## ph                  43.2924    14.6602   2.953  0.00348 **
## taxaSea star        36.8277   150.4571   0.245  0.80685   
## taxaSea urchin      64.7199   116.7554   0.554  0.57990   
## ph:taxaSea star      0.2466    19.5799   0.013  0.98996   
## ph:taxaSea urchin   -4.0673    15.2539  -0.267  0.78999   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.66 on 228 degrees of freedom
## Multiple R-squared:  0.3802, Adjusted R-squared:  0.3666 
## F-statistic: 27.97 on 5 and 228 DF,  p-value: < 2.2e-16
anova(model3, model4) # slopes not important 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph * taxa
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    230 138714                           
## 2    228 138618  2    95.941 0.0789 0.9242
summary(model5 <- lm(fert~ph+taxa+I(ph^2))) #test 2nd order polynomial with taxa intercepts 
## 
## Call:
## lm(formula = fert ~ ph + taxa + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.011  -7.635   7.262  15.482  58.831 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1151.728    296.737  -3.881 0.000136 ***
## ph               280.369     80.327   3.490 0.000579 ***
## taxaSea star      39.174      7.735   5.064 8.42e-07 ***
## taxaSea urchin    33.925      6.283   5.400 1.67e-07 ***
## I(ph^2)          -16.266      5.427  -2.997 0.003026 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.14 on 229 degrees of freedom
## Multiple R-squared:  0.4031, Adjusted R-squared:  0.3927 
## F-statistic: 38.67 on 4 and 229 DF,  p-value: < 2.2e-16
anova(model3, model5) # adding 2nd order improves ph+taxa model. 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph + taxa + I(ph^2)
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    230 138714                                
## 2    229 133478  1    5235.6 8.9824 0.003026 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model2, model5) # adding taxa improves ph+2nd order model. 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + I(ph^2)
## Model 2: fert ~ ph + taxa + I(ph^2)
##   Res.Df    RSS Df Sum of Sq     F  Pr(>F)    
## 1    231 151839                               
## 2    229 133478  2     18361 15.75 3.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model6 <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)) # test adding 3rd order <----------final model 
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.790  -7.006   7.503  14.710  56.847 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7251.801   3050.313   2.377  0.01826 *  
## ph             -3222.939   1268.245  -2.541  0.01171 *  
## I(ph^2)          468.019    175.057   2.674  0.00805 ** 
## I(ph^3)          -22.205      8.023  -2.768  0.00611 ** 
## taxaSea star      38.558      7.628   5.055 8.85e-07 ***
## taxaSea urchin    32.236      6.224   5.180 4.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.8 on 228 degrees of freedom
## Multiple R-squared:  0.4225, Adjusted R-squared:  0.4099 
## F-statistic: 33.37 on 5 and 228 DF,  p-value: < 2.2e-16
anova(model5, model6) # adding 3rd order improves model 
## Analysis of Variance Table
## 
## Model 1: fert ~ ph + taxa + I(ph^2)
## Model 2: fert ~ ph + I(ph^2) + I(ph^3) + taxa
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    229 133478                                
## 2    228 129139  1    4338.8 7.6603 0.006109 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model6$residuals) #residuals kind of normal ?

plot(model6) #doesn't look totally okay. should follow up. 

lm.echino <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)

Echinoderm - Generate predictions from fitted model to plot

summary(lm.echino)
## 
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.790  -7.006   7.503  14.710  56.847 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7251.801   3050.313   2.377  0.01826 *  
## ph             -3222.939   1268.245  -2.541  0.01171 *  
## I(ph^2)          468.019    175.057   2.674  0.00805 ** 
## I(ph^3)          -22.205      8.023  -2.768  0.00611 ** 
## taxaSea star      38.558      7.628   5.055 8.85e-07 ***
## taxaSea urchin    32.236      6.224   5.180 4.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.8 on 228 degrees of freedom
## Multiple R-squared:  0.4225, Adjusted R-squared:  0.4099 
## F-statistic: 33.37 on 5 and 228 DF,  p-value: < 2.2e-16
predict.echino <- predict(lm.echino, interval = 'confidence')
predict.echino.df <- predict.echino %>%
  as.data.frame() %>%
  cbind(subset(fert.data.3, Phylum=="Echinodermata")$pH.experim, as.factor(subset(fert.data.3, Phylum=="Echinodermata")$Taxa)) %>% 
purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))

predict.echino.df$taxa  <- factor(predict.echino.df$taxa, levels=c("Sea star", "Sea urchin", "Sand dollar"))

Echinoderm - Plot fertilization data with fitted models

fert.data.3 %>%
  filter(Phylum=="Echinodermata") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.echino.df$fit, col=predict.echino.df$taxa)) +
  ggtitle("Echinodermata") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa",
      values=c(`Sand dollar`="#e41a1c", 
               `Sea star`="#4daf4a",
               `Sea urchin`="#ff7f00")) + theme_minimal()

Cnidaria

fert<-subset(fert.data.3, Phylum=="Cnidaria")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Cnidaria")$pH.experim
sperm <- subset(fert.data.3, Phylum=="Cnidaria")$Sperm.per.mL
ph.group <- as.factor(subset(fert.data.3, Phylum=="Cnidaria")$pH.group)
x <- c(7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)

summary(model0 <- lm(fert~ph)) #pH not sign. alone 
## 
## Call:
## lm(formula = fert ~ ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.637 -25.248  -4.231  33.430  49.855 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -331.51     230.16  -1.440   0.1551  
## ph             48.93      29.14   1.679   0.0985 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.27 on 59 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.04559,    Adjusted R-squared:  0.02942 
## F-statistic: 2.819 on 1 and 59 DF,  p-value: 0.09847
summary(model1 <- lm(fert~ph.group)) # ph.group not sign. alone 
## 
## Call:
## lm(formula = fert ~ ph.group)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -58.95 -25.12  -5.87  35.03  51.23 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         47.768      7.005   6.819 5.47e-09 ***
## ph.group(7.8,8.2]   11.352      8.875   1.279    0.206    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.59 on 59 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.02698,    Adjusted R-squared:  0.01049 
## F-statistic: 1.636 on 1 and 59 DF,  p-value: 0.2059
summary(model2 <- lm(fert~sperm)) # sperm concentration sign. factor 
## 
## Call:
## lm(formula = fert ~ sperm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.190 -19.684  -6.424  34.686  46.936 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.199e+01  4.276e+00  12.158   <2e-16 ***
## sperm       7.840e-07  3.411e-07   2.299    0.025 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.41 on 60 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.08095,    Adjusted R-squared:  0.06563 
## F-statistic: 5.285 on 1 and 60 DF,  p-value: 0.02501
summary(model3 <- lm(fert~sperm+ph)) # pH.group not sign. after controlling for sperm conc. intercept 
## 
## Call:
## lm(formula = fert ~ sperm + ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.388 -22.842  -2.913  32.158  51.155 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.406e+02  2.219e+02  -1.535   0.1302  
## sperm        7.909e-07  3.376e-07   2.343   0.0226 *
## ph           4.974e+01  2.810e+01   1.770   0.0819 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.07 on 58 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1281, Adjusted R-squared:  0.09806 
## F-statistic: 4.262 on 2 and 58 DF,  p-value: 0.01876
summary(model4 <- lm(fert~sperm+ph.group)) # pH.group not sign. after controlling for sperm conc. intercept 
## 
## Call:
## lm(formula = fert ~ sperm + ph.group)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.882 -22.771  -4.626  31.833  53.144 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.436e+01  6.896e+00   6.432 2.62e-08 ***
## sperm             8.092e-07  3.409e-07   2.374   0.0209 *  
## ph.group(7.8,8.2] 1.241e+01  8.557e+00   1.450   0.1525    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.35 on 58 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1131, Adjusted R-squared:  0.08256 
## F-statistic:   3.7 on 2 and 58 DF,  p-value: 0.03074
summary(model5 <- lm(fert~sperm*ph)) # pH not sign. after controlling for sperm conc. intercept 
## 
## Call:
## lm(formula = fert ~ sperm * ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.867 -23.524  -3.503  31.476  51.425 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.635e+02  2.298e+02  -1.582   0.1192  
## sperm        1.066e-05  2.307e-05   0.462   0.6460  
## ph           5.264e+01  2.910e+01   1.809   0.0757 .
## sperm:ph    -1.249e-06  2.922e-06  -0.428   0.6706  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.3 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1309, Adjusted R-squared:  0.08517 
## F-statistic: 2.862 on 3 and 57 DF,  p-value: 0.04465
summary(model6 <- lm(fert~sperm*ph.group)) # ph.group not sign. after controlling for sperm 
## 
## Call:
## lm(formula = fert ~ sperm * ph.group)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57.24 -23.18  -4.75  32.72  53.67 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4.374e+01  7.099e+00   6.162 7.79e-08 ***
## sperm                    9.548e-07  4.888e-07   1.953   0.0557 .  
## ph.group(7.8,8.2]        1.343e+01  8.961e+00   1.499   0.1394    
## sperm:ph.group(7.8,8.2] -2.874e-07  6.867e-07  -0.419   0.6771    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.58 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1159, Adjusted R-squared:  0.06933 
## F-statistic:  2.49 on 3 and 57 DF,  p-value: 0.06941
summary(model3 <- lm(fert~sperm+ph)) 
## 
## Call:
## lm(formula = fert ~ sperm + ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.388 -22.842  -2.913  32.158  51.155 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.406e+02  2.219e+02  -1.535   0.1302  
## sperm        7.909e-07  3.376e-07   2.343   0.0226 *
## ph           4.974e+01  2.810e+01   1.770   0.0819 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.07 on 58 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1281, Adjusted R-squared:  0.09806 
## F-statistic: 4.262 on 2 and 58 DF,  p-value: 0.01876
summary(model7 <- lm(fert~sperm+ph+I(ph^2)))
## 
## Call:
## lm(formula = fert ~ sperm + ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.169 -22.309  -2.716  32.691  51.073 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.245e+03  1.502e+04  -0.083   0.9342  
## sperm        7.887e-07  3.425e-07   2.303   0.0249 *
## ph           2.787e+02  3.801e+03   0.073   0.9418  
## I(ph^2)     -1.448e+01  2.403e+02  -0.060   0.9522  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.35 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1282, Adjusted R-squared:  0.0823 
## F-statistic: 2.794 on 3 and 57 DF,  p-value: 0.04842
summary(model8 <- lm(fert~sperm+ph+I(ph^2)+I(ph^3))) 
## 
## Call:
## lm(formula = fert ~ sperm + ph + I(ph^2) + I(ph^3))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -60.56 -24.36  -3.36  29.84  53.87 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.127e+06  9.981e+05  -1.129   0.2638  
## sperm        7.849e-07  3.417e-07   2.297   0.0254 *
## ph           4.265e+05  3.779e+05   1.128   0.2639  
## I(ph^2)     -5.380e+04  4.769e+04  -1.128   0.2641  
## I(ph^3)      2.262e+03  2.006e+03   1.128   0.2642  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.28 on 56 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1475, Adjusted R-squared:  0.08665 
## F-statistic: 2.423 on 4 and 56 DF,  p-value: 0.05875
summary(model9 <- lm(fert~ph+sperm+I(sperm^2))) #<--- lowest AIC 
## 
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -59.882 -22.759  -2.132  25.616  53.629 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.741e+02  2.168e+02  -1.726   0.0898 .
## ph           5.337e+01  2.743e+01   1.946   0.0566 .
## sperm        5.355e-06  2.272e-06   2.357   0.0219 *
## I(sperm^2)  -6.703e-14  3.302e-14  -2.030   0.0470 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.24 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1869, Adjusted R-squared:  0.1441 
## F-statistic: 4.368 on 3 and 57 DF,  p-value: 0.007748
summary(model10 <- lm(fert~ph+sperm+I(sperm^2)+I(sperm^3))) 
## 
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2) + I(sperm^3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.621 -21.172  -4.073  26.612  54.165 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.650e+02  2.167e+02  -1.685   0.0976 .
## ph           5.261e+01  2.740e+01   1.920   0.0599 .
## sperm       -1.298e-06  6.605e-06  -0.197   0.8449  
## I(sperm^2)   9.018e-13  9.039e-13   0.998   0.3227  
## I(sperm^3)  -1.272e-20  1.186e-20  -1.073   0.2881  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.2 on 56 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.2033, Adjusted R-squared:  0.1464 
## F-statistic: 3.572 on 4 and 56 DF,  p-value: 0.01153
summary(model11 <- lm(fert~ph.group+sperm+I(sperm^2))) 
## 
## Call:
## lm(formula = fert ~ ph.group + sperm + I(sperm^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.929 -22.458  -6.036  30.681  55.359 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.948e+01  7.203e+00   5.481 9.99e-07 ***
## ph.group(7.8,8.2]  1.281e+01  8.368e+00   1.531   0.1314    
## sperm              5.176e-06  2.296e-06   2.255   0.0280 *  
## I(sperm^2)        -6.413e-14  3.336e-14  -1.923   0.0595 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.62 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1671, Adjusted R-squared:  0.1233 
## F-statistic: 3.813 on 3 and 57 DF,  p-value: 0.01466
AIC(model0, model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11) 
## Warning in AIC.default(model0, model1, model2, model3, model4, model5,
## model6, : models are not all fitted to the same number of observations
##         df      AIC
## model0   3 604.6512
## model1   3 605.8295
## model2   3 611.2413
## model3   4 601.1340
## model4   4 602.1735
## model5   5 602.9387
## model6   5 603.9863
## model7   5 603.1301
## model8   6 603.7602
## model9   5 598.8751
## model10  6 599.6347
## model11  5 600.3409
anova(model3, model9) #model 9 improves model 3 by adding a 2nd order polynomial sperm variable 
## Analysis of Variance Table
## 
## Model 1: fert ~ sperm + ph
## Model 2: fert ~ ph + sperm + I(sperm^2)
##   Res.Df   RSS Df Sum of Sq      F  Pr(>F)  
## 1     58 59665                              
## 2     57 55642  1    4023.6 4.1219 0.04701 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model9$residuals) #kinda normal 

plot(model9) #somewhat concerning ... check back later 

lm.cnid <- lm(fert~ph+sperm+I(sperm^2), na.action = na.exclude)

Cnidaria - Generate predictions from fitted model to plot

summary(lm.cnid)
## 
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2), na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -59.882 -22.759  -2.132  25.616  53.629 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.741e+02  2.168e+02  -1.726   0.0898 .
## ph           5.337e+01  2.743e+01   1.946   0.0566 .
## sperm        5.355e-06  2.272e-06   2.357   0.0219 *
## I(sperm^2)  -6.703e-14  3.302e-14  -2.030   0.0470 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.24 on 57 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1869, Adjusted R-squared:  0.1441 
## F-statistic: 4.368 on 3 and 57 DF,  p-value: 0.007748
predict.cnid <- predict(lm.cnid, interval = 'confidence') 
predict.cnid.df <- cbind(as.data.frame(predict.cnid),
        subset(fert.data.3, Phylum=="Cnidaria")$pH.experim,
        subset(fert.data.3, Phylum=="Cnidaria")$Sperm.per.mL) %>%
  purrr::set_names(c("fit", "lwr", "upr", "ph", "sperm"))

Cnidaria - Plot fertilization data with fitted models

fert.data.3 %>%
  filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.cnid.df$fit)) +
  ggtitle("Cnidaria") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).

fert.data.3 %>%
  filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
  #geom_line(aes(pH.experim, predict.cnid.df$fit)) +
  ggtitle("Cnidaria") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

Crustacea

fert<-subset(fert.data.3, Phylum=="Crustacean")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Crustacean")$pH.experim
taxa<-subset(fert.data.3, Phylum=="Crustacean")$Taxa
#x <- c(6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3, 8.4)

summary(model0 <- lm(fert~ph)) #pH NOT sign. alone 
## 
## Call:
## lm(formula = fert ~ ph)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -43.36 -31.23  16.33  20.87  29.80 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -44.42     117.02  -0.380    0.709
## ph             13.10      15.28   0.857    0.404
## 
## Residual standard error: 29.49 on 16 degrees of freedom
## Multiple R-squared:  0.04388,    Adjusted R-squared:  -0.01588 
## F-statistic: 0.7343 on 1 and 16 DF,  p-value: 0.4041
summary(model1 <- lm(fert~taxa)) #taxa sign. alone 
## 
## Call:
## lm(formula = fert ~ taxa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.380  -2.553   2.936  13.592  24.562 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    16.97      10.68   1.589  0.13296   
## taxacopepod    49.74      12.33   4.034  0.00108 **
## taxacrab       49.93      18.50   2.699  0.01648 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.36 on 15 degrees of freedom
## Multiple R-squared:  0.5297, Adjusted R-squared:  0.467 
## F-statistic: 8.446 on 2 and 15 DF,  p-value: 0.003492
summary(model2 <- lm(fert~taxa+ph)) # pH.group not sign. after controlling for taxa intercept 
## 
## Call:
## lm(formula = fert ~ taxa + ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.008  -3.816   3.623   8.130  20.837 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -133.04      81.27  -1.637 0.123872    
## taxacopepod    52.86      11.55   4.575 0.000432 ***
## taxacrab       49.93      17.15   2.912 0.011363 *  
## ph             19.36      10.41   1.860 0.084055 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.8 on 14 degrees of freedom
## Multiple R-squared:  0.6228, Adjusted R-squared:  0.542 
## F-statistic: 7.707 on 3 and 14 DF,  p-value: 0.002791
anova(model1,model2) #pH does not improve the taxa only model 
## Analysis of Variance Table
## 
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa + ph
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     15 6843.2                              
## 2     14 5487.5  1    1355.7 3.4588 0.08406 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~taxa*ph)) # pH.group not sign. after controlling for taxa intercept 
## 
## Call:
## lm(formula = fert ~ taxa * ph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.838  -0.120   2.592   7.777  20.989 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)       11.00     325.50   0.034    0.974
## taxacopepod      -93.21     337.36  -0.276    0.787
## taxacrab        -328.50     563.79  -0.583    0.571
## ph                 0.77      41.98   0.018    0.986
## taxacopepod:ph    18.85      43.57   0.433    0.673
## taxacrab:ph       48.83      72.71   0.672    0.515
## 
## Residual standard error: 20.99 on 12 degrees of freedom
## Multiple R-squared:  0.6367, Adjusted R-squared:  0.4853 
## F-statistic: 4.205 on 5 and 12 DF,  p-value: 0.01931
anova(model1, model3) #pH interacion does not improve the taxa only model 
## Analysis of Variance Table
## 
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa * ph
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     15 6843.2                           
## 2     12 5286.6  3    1556.7 1.1778 0.3591
summary(model4 <- lm(fert~taxa+ph+I(ph^2)))
## 
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.647  -2.847   2.097   7.278  26.587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3781.13    1850.50  -2.043 0.061838 .  
## taxacopepod    64.97      12.18   5.335 0.000135 ***
## taxacrab       49.93      15.61   3.199 0.006981 ** 
## ph            977.86     485.91   2.012 0.065366 .  
## I(ph^2)       -62.87      31.87  -1.973 0.070146 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.02 on 13 degrees of freedom
## Multiple R-squared:  0.7098, Adjusted R-squared:  0.6204 
## F-statistic: 7.947 on 4 and 13 DF,  p-value: 0.001808
anova(model1, model4) #interesting - sign. 
## Analysis of Variance Table
## 
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa + ph + I(ph^2)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     15 6843.2                              
## 2     13 4223.0  2    2620.2 4.0331 0.04338 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model5 <- lm(fert~taxa+ph+I(ph^2)+I(ph^3))) # <--- final model 
## 
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2) + I(ph^3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.619  -6.728   1.979   5.331  35.003 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 105184.42   57490.39   1.830 0.092251 .  
## taxacopepod     91.46      17.86   5.122 0.000252 ***
## taxacrab        49.93      14.25   3.504 0.004351 ** 
## ph          -43017.45   23206.21  -1.854 0.088515 .  
## I(ph^2)       5843.13    3114.80   1.876 0.085195 .  
## I(ph^3)       -263.63     139.03  -1.896 0.082265 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.46 on 12 degrees of freedom
## Multiple R-squared:  0.7767, Adjusted R-squared:  0.6836 
## F-statistic: 8.346 on 5 and 12 DF,  p-value: 0.001323
anova(model4, model5) #not a sign. improvement 
## Analysis of Variance Table
## 
## Model 1: fert ~ taxa + ph + I(ph^2)
## Model 2: fert ~ taxa + ph + I(ph^2) + I(ph^3)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     13 4223.0                              
## 2     12 3249.4  1     973.6 3.5955 0.08226 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model0, model1, model2, model3, model4, model5) 
##        df      AIC
## model0  3 176.7832
## model1  4 166.0134
## model2  5 164.0392
## model3  7 167.3677
## model4  6 161.3244
## model5  7 158.6070
hist(model5$residuals) #kinda normal 

plot(model5) #not much data, but residuals look okay 

# lm.crust <- lm(fert~taxa+ph+I(ph^2)+I(ph^3), na.action = na.exclude) #over fit
lm.crust <- lm(fert~taxa+ph+I(ph^2))

Crustacean - Generate predictions from fitted model to plot

summary(lm.crust)
## 
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.647  -2.847   2.097   7.278  26.587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3781.13    1850.50  -2.043 0.061838 .  
## taxacopepod    64.97      12.18   5.335 0.000135 ***
## taxacrab       49.93      15.61   3.199 0.006981 ** 
## ph            977.86     485.91   2.012 0.065366 .  
## I(ph^2)       -62.87      31.87  -1.973 0.070146 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.02 on 13 degrees of freedom
## Multiple R-squared:  0.7098, Adjusted R-squared:  0.6204 
## F-statistic: 7.947 on 4 and 13 DF,  p-value: 0.001808
predict.crust <- predict(lm.crust, interval = 'confidence') 
predict.crust.df <- cbind(as.data.frame(predict.crust),
        subset(fert.data.3, Phylum=="Crustacean")$pH.experim,
        subset(fert.data.3, Phylum=="Crustacean")$Taxa) %>%
  purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))

predict.crust.df$taxa  <- factor(predict.crust.df$taxa, levels=c("copepod", "crab", "amphipod"))

Crustacean - Plot fertilization data with fitted models

fert.data.3 %>%
  filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
  ggtitle("Crustacea") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa",
      values=c(amphipod="#e41a1c", 
               copepod="#4daf4a",
               crab="#ff7f00")) +
  theme_minimal()

fert.data.3 %>%
  filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
  #geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
  ggtitle("Crustacea") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()

Plot all 4 taxa at once

How many papers per taxa?

fert.data.3 %>%
  group_by(Taxa) %>% 
  summarise(count=n_distinct(Author))
## # A tibble: 12 x 2
##    Taxa        count
##    <fct>       <int>
##  1 abalone         2
##  2 amphipod        2
##  3 clam            5
##  4 copepod         4
##  5 coral           5
##  6 crab            2
##  7 mussel          4
##  8 oyster          4
##  9 Sand dollar     1
## 10 scallop         3
## 11 Sea star        2
## 12 Sea urchin     13

Call plots and save

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
#library(grid)

plot.mollusca <- fert.data.3 %>%
  filter(Phylum=="Mollusca") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.mollusc.df$fit, col=predict.mollusc.df$taxa)) +
  ggtitle("Mollusca") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa (n = # studies)",
      values=c(abalone="#e41a1c",mussel="#4daf4a",scallop="#ff7f00",oyster="#984ea3",clam='#377eb8'),
      labels = c("abalone (n=2)", "mussel (n=4)", "scallop (n=3)", "oyster (n=4)", "clam (n=5)")) +
  theme_minimal()
ggsave(filename = "fert.mollusca.pdf", width = 6, height = 4)

plot.echin <- fert.data.3 %>%
  filter(Phylum=="Echinodermata") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.echino.df$fit, col=predict.echino.df$taxa)) +
  ggtitle("Echinodermata") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa (n = # studies)",
      values=c(`Sand dollar`="#e41a1c", 
               `Sea star`="#4daf4a",
               `Sea urchin`="#ff7f00"),
      labels = c("Sand dollar (n=1)", "Sea star (n=2)", "Sea urchin (n=13)")) + 
  theme_minimal()
ggsave(filename = "fert.echinodermata.pdf", width = 6, height = 4)

plot.cnid <- fert.data.3 %>%
  filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
  #geom_line(aes(pH.experim, predict.cnid.df$fit)) +
  ggtitle("Cnidaria (n=5 studies)") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()
ggsave(filename = "fert.cnidaria.pdf", width = 5, height = 4)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
plot.cnid.overfit <- fert.data.3 %>%
  filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.cnid.df$fit)) +
  ggtitle("Cnidaria - overfit (n=5 studies)") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()
ggsave(filename = "fert.cnidaria.overfit.pdf", width = 5, height = 4)
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).
plot.crust <- fert.data.3 %>%
  filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
  #geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
  ggtitle("Crustacea (n=8 studies)") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  theme_minimal()
ggsave(filename = "fert.crustacea.pdf", width = 5, height = 4)

plot.crust.overfit <- fert.data.3 %>%
  filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) + 
  geom_jitter(size=1.2, width=0.03) +
  #facet_wrap(~Phylum, scales="free") + theme_minimal() +
  #geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
  geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
  ggtitle("Crustacea - overfit?") + 
  xlab("Experimental pH") + ylab("Fertilization %") +
  scale_color_manual(name="Taxa (n = # studies)",
      values=c(amphipod="#e41a1c", 
               copepod="#4daf4a",
               crab="#ff7f00"),
      labels = c("amphipod (n=2)", "copepod (n=4)", "crab (n=2)")) +
  theme_minimal()  
ggsave(filename = "fert.crustacea.overfit.pdf", width = 6, height = 4)

pdf("fert.all.pdf", height = 11, width = 15)
grid.arrange(plot.mollusca, plot.echin,  plot.cnid, plot.crust, plot.cnid.overfit,  plot.crust.overfit, ncol=2)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).
dev.off()
## quartz_off_screen 
##                 2

Other analyses not included in paper

Test insemination minutes

Insemination.mins 0.42319 -0.90604 0.3439 0.000999 ***

# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=Insemination.mins, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ Insemination minutes by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 65 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*Insemination.mins, fert.data.3)) # not sign. alone, but sign for some phyla 
##                                   Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(Phylum)                     2  22015   11007  14.131 1.18e-06 ***
## Insemination.mins                  1      6       6   0.008   0.9288    
## factor(Phylum):Insemination.mins   2   6310    3155   4.050   0.0182 *  
## Residuals                        394 306910     779                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 65 observations deleted due to missingness

Test egg pre-exposure time

egg.pre.exp.time 0.82583 0.56392 0.2503 0.001998 **

# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(egg.pre.exp.time), y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ Egg pre-exposure minutes by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 266 rows containing non-finite values (stat_smooth).
fert.data.3$egg.pre.exp.time.log <- log(fert.data.3$egg.pre.exp.time+1)
summary(fert.data.3$egg.pre.exp.time.log)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.000   2.773   1.991   2.773   7.273     156
summary(aov(Perc.Fertilization ~ factor(Phylum)*egg.pre.exp.time.log, fert.data.3)) # not sign. alone, but sign for some phyla
##                                      Df Sum Sq Mean Sq F value   Pr(>F)
## factor(Phylum)                        2   2712    1356   1.911 0.149717
## egg.pre.exp.time.log                  1    528     528   0.744 0.389184
## factor(Phylum):egg.pre.exp.time.log   1   8413    8413  11.855 0.000656
## Residuals                           304 215740     710                 
##                                        
## factor(Phylum)                         
## egg.pre.exp.time.log                   
## factor(Phylum):egg.pre.exp.time.log ***
## Residuals                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 156 observations deleted due to missingness

Test sperm concentration

Sperm.per.mL 0.81881 0.57406 0.2492 0.001998 **

ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(Sperm.per.mL+1), y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ sperm concentration (log-trans) by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
fert.data.3$Sperm.per.mL.log <- log(fert.data.3$Sperm.per.mL+1)
summary(aov(Perc.Fertilization ~ factor(Phylum)*Sperm.per.mL.log, fert.data.3))  # not sign. alone, but sign for some phyla
##                                  Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(Phylum)                    2  29032   14516  27.612 1.40e-11 ***
## Sperm.per.mL.log                  1    537     537   1.022    0.313    
## factor(Phylum):Sperm.per.mL.log   2  23492   11746  22.343 1.15e-09 ***
## Residuals                       255 134059     526                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 204 observations deleted due to missingness

Same plot as above, but shapes = pH group

pH groups: – 6-7.3 – 7.3-7.5 – 7.5-7.8 – 7.8-8.2

ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(Sperm.per.mL+1), y=Perc.Fertilization, group=Phylum:pH.group, col=Phylum:pH.group, text=`Common name`, shape=pH.group)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum:pH.group)) +
  ggtitle("Fertilization Rate ~ sperm concentration (log-trans) by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Factor `Phylum:pH.group` contains implicit NA, consider using
## `forcats::fct_explicit_na`

## Warning: Factor `Phylum:pH.group` contains implicit NA, consider using
## `forcats::fct_explicit_na`

Test sperm: ratio

sperm.egg 0.20294 0.97919 0.0953 0.038961 *

# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=sperm.egg, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ sperm:egg ratio by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*sperm.egg, fert.data.3))  # sign. main and interaction effects 
##                           Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(Phylum)             2  16503    8252   29.06 1.16e-11 ***
## sperm.egg                 35 135929    3884   13.68  < 2e-16 ***
## factor(Phylum):sperm.egg   1   9519    9519   33.52 3.09e-08 ***
## Residuals                180  51119     284                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 246 observations deleted due to missingness

Test number of females used in assays

n.females 0.90918 0.41640 0.2463 0.001998 **

# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=n.females, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) + 
  geom_point(size=1.5, width=0.02) +
  #facet_wrap(~Taxa) +
  geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
  ggtitle("Fertilization Rate ~ No. females used for assay, by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 97 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*n.females, fert.data.3))  # sign. main effect, not interaction  
##                           Df Sum Sq Mean Sq F value   Pr(>F)    
## factor(Phylum)             2  14574    7287   9.605 8.62e-05 ***
## n.females                  1  12866   12866  16.957 4.74e-05 ***
## factor(Phylum):n.females   2   2094    1047   1.380    0.253    
## Residuals                362 274654     759                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 97 observations deleted due to missingness

Full model, all factors explored above.

summary(aov(Perc.Fertilization ~ factor(Phylum)*(pH.experim + Insemination.mins + egg.pre.exp.time.log + Sperm.per.mL.log + n.females), fert.data.3))  #  
##                                      Df Sum Sq Mean Sq F value   Pr(>F)
## factor(Phylum)                        2   3959    1979   7.094 0.001069
## pH.experim                            1   3416    3416  12.242 0.000582
## Insemination.mins                     1    153     153   0.548 0.459852
## egg.pre.exp.time.log                  1   1474    1474   5.284 0.022615
## Sperm.per.mL.log                      1   4207    4207  15.079 0.000142
## n.females                             1   7482    7482  26.815 5.68e-07
## factor(Phylum):pH.experim             2    995     497   1.782 0.171047
## factor(Phylum):Insemination.mins      1  10689   10689  38.308 3.64e-09
## factor(Phylum):egg.pre.exp.time.log   1   2883    2883  10.333 0.001536
## factor(Phylum):Sperm.per.mL.log       1   3397    3397  12.173 0.000603
## factor(Phylum):n.females              1    794     794   2.847 0.093164
## Residuals                           190  53015     279                 
##                                        
## factor(Phylum)                      ** 
## pH.experim                          ***
## Insemination.mins                      
## egg.pre.exp.time.log                *  
## Sperm.per.mL.log                    ***
## n.females                           ***
## factor(Phylum):pH.experim              
## factor(Phylum):Insemination.mins    ***
## factor(Phylum):egg.pre.exp.time.log ** 
## factor(Phylum):Sperm.per.mL.log     ***
## factor(Phylum):n.females            .  
## Residuals                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 261 observations deleted due to missingness